package com.luis.toolsuite.isolationforest;

import java.io.Serializable;
import java.util.Random;

public class ITree implements Serializable{
	
	private static final long serialVersionUID = 470727887197134526L;

	// 被选中的属性索引
    public int attrIndex;
    // 被选中的属性的一个具体的值
    public double attrValue;
    // 树的总叶子节点数
    public int leafNodes;
    // 该节点在树中的高度
    public int curHeight;
    // 左右子树
    public ITree lTree, rTree;

    // 构造函数，初始化ITree中的值
    public ITree(int attrIndex, double attrValue) {
        // 默认高度，树的高度从0开始计算
        this.curHeight = 0;

        this.lTree = null;
        this.rTree = null;
        this.leafNodes = 1;
        this.attrIndex = attrIndex;
        this.attrValue = attrValue;
    }

    /**
     * 根据samples样本数据递归的创建 ITree 树
     */
	public static ITree createITree(double[][] samples, int curHeight, int limitHeight, Random random) {
    	
    	ITree iTree = null;

        /*************** 第一步：判断递归是否满足结束条件 **************/
        if (samples.length == 0) {
            return iTree;
        } else if (curHeight >= limitHeight || samples.length == 1) {
            int randomAttr = random.nextInt(samples[0].length);
            iTree = new ITree(randomAttr, samples[0][randomAttr]);
            iTree.leafNodes = samples.length;
            iTree.curHeight = curHeight;
            return iTree;
        }

        int rows = samples.length;
        int cols = samples[0].length;

        // 判断是否所有样本都一样，如果都一样构建也终止
        boolean isAllSame = IFUtils.isSampleAllSame(samples);
        if (isAllSame) {
            iTree = new ITree(0, samples[0][0]);
            iTree.leafNodes = samples.length;
            iTree.curHeight = curHeight;
            return iTree;
        }

        /***************** 第二步：不满足递归结束条件，继续递归产生子树 ***************/
        int attrIndex = random.nextInt(cols);

        // 计算被选维度的最大值和最小值
        double[] maxAndMin = calMaxAndMinOfOneAttr(attrIndex, samples);
    	double max = maxAndMin[0];
    	double min = maxAndMin[1];
        
    	// 当最大值和最小值一样，说明样本中随机选择的这个属性的值是一样的，这时用属性来区分样本没有意义，要重新选择属性
        while(min == max) {
        	attrIndex = (attrIndex + 1) % cols;
        	maxAndMin = calMaxAndMinOfOneAttr(attrIndex, samples);
        	max = maxAndMin[0];
        	min = maxAndMin[1];
        }

        // 计算划分属性值
        double attrDouble = random.nextDouble();
        // 如果attrValue取到最小值（random.nextDouble()=0），此时样本就会被全被分到右铡，生成的树不满足要求，要重新计算属性值。
        while(attrDouble == 0) {
        	attrDouble = random.nextDouble();
        }
        double attrValue = attrDouble * (max - min) + min;

        // 将所有的样本的attrIndex对应的属性与attrValue 进行比较以选出左右子树对应的样本
        int lnodes = 0, rnodes = 0;
        double curValue;
        for (int i = 0; i < rows; i++) {
            curValue = samples[i][attrIndex];
            if (curValue <= attrValue) {
                lnodes++;
            } else {
                rnodes++;
            }
        }

        double[][] lSamples = new double[lnodes][cols];
        double[][] rSamples = new double[rnodes][cols];

        lnodes = 0;
        rnodes = 0;
        for (int i = 0; i < rows; i++) {
            curValue = samples[i][attrIndex];
            if (curValue <= attrValue) {
                lSamples[lnodes++] = samples[i];
            } else {
                rSamples[rnodes++] = samples[i];
            }
        }

        // 创建父节点
        ITree parent = new ITree(attrIndex, attrValue);
        parent.leafNodes = rows;
        parent.curHeight = curHeight;
        parent.lTree = createITree(lSamples, curHeight + 1, limitHeight, random);
        parent.rTree = createITree(rSamples, curHeight + 1, limitHeight, random);

        return parent;
    }
	
    
    private static double[] calMaxAndMinOfOneAttr(int attrIndex, double[][] samples) {
    	double min, max;
        min = samples[0][attrIndex];
        max = min;
        for (int i = 1; i < samples.length; i++) {
            min = Math.min(min, samples[i][attrIndex]);
            max = Math.max(max, samples[i][attrIndex]);
        }
        return new double[] {max, min};
    }
}
